home *** CD-ROM | disk | FTP | other *** search
- Subject: bm - odd address optimization
- Newsgroups: mod.sources
- Approved: jpn@panda.UUCP
-
- Mod.sources: Volume 5, Issue 16
- Submitted by: talcott!topaz!lll-crg!csustan!casey
-
- [ I haven't tried this, but I bet you odd-address-machine people will
- be happy to see this fix - moderator
- ]
-
- Hi,
- I've just recently porteed bm to a PDP-11 runnung under the August seismo
- distribution of BSD2.9. It turns out this wasn't an easy process at all and
- I was more that 3 hours into it before I found out why bm ran correctly, but
- almost 4 times slower than fgrep! The problem was bm doing I/O on odd
- addresses and/or going for odd length transfers. The problem rests partially
- with the PDP-11 architecture and partially with the implementation of copyout
- in the kernel. I'll be fixing copyout (and copyin of course), but the PDP's
- architectual problems remain. And as the PDP isn't the only machine that
- has biases about odd addresses, I thought I'd forward on the necessary changes
- to you. Hope these are useful!
-
- Leith (Casey) Leedom lll-crg.arpa!csustan!casey
- Computer Science Department work: (209) 667-3185
- California State University, Stanislaus home: (209) 634-2775
- Turlock, CA 95380
-
- P.S. I'm only forwarding bm.c, Execute.c, and MoveResidue.c as they're the
- only one's that needed changing.
-
- ---- cut here ---- cut here ---- cut here ---- cut here ---- cut here ----
- #! /bin/sh
- # This is a shell archive, meaning:
- # 1. Remove everything above the #! /bin/sh line.
- # 2. Save the resulting text in a file.
- # 3. Execute the file with /bin/sh (not csh) to create:
- # Execute.c
- # MoveResidue.c
- # bm.c
- # This archive created: Fri May 23 05:39:21 1986
- export PATH; PATH=/bin:/usr/bin:$PATH
- if test -f 'Execute.c'
- then
- echo shar: "will not over-write existing file 'Execute.c'"
- else
- cat << \SHAR_EOF > 'Execute.c'
- #include <stdio.h>
- #include "bm.h"
- #include "Extern.h"
- Execute(DescVec, NPats, TextFile, Buffer)
- struct PattDesc *DescVec[]; /* pointers to status vectors for the different
- * patterns, including skip tables, position in buffer, etc. */
- int NPats; /* number of patterns */
- char Buffer[]; /* holds text from file */
- int TextFile; /* file to search */
- {
- long BuffPos; /* offset of first char in Buffer in TextFile */
- int NRead, /* number of chars read from file */
- NWanted, /* number of chars wanted */
- NAvail, /* number of chars actually read */
- BuffSize, /* number of chars in buffer */
- BuffEx, /* flag to indicate that buffer has been searched */
- ResSize,
- /* number of characters in the last, incomplete line */
- Found, /* flag indicates whether pattern found
- * completely and all matches printed */
- Valid; /* was the match "valid", i.e. if -x used,
- * did the whole line match? */
- char *BuffEnd; /* pointer to last char of last complete line */
-
- /*
- * In order to optimize I/O for some machines which impose a severe
- * penalty for I/O on an odd address, we play a nasty game. First, we
- * assume that the Buffer which is passed to us has an even address.
- * Then whenever we move a buffer residual back to the beginning of
- * the Buffer for the next read cycle, we actually move it to Buffer +
- * Odd(Residual) where Odd() is 1 if Residual is odd, zero otherwise.
- * This causes the the next read to go down on an even address. We
- * keep track of the beginning of data in the Buffer with BuffStart.
- */
- char *BuffStart;
-
- /* misc working variables */
- #ifdef notdef
- int i;
- #else !notdef
- register struct PattDesc *Desc;
- struct PattDesc **Vec, **LastPatt = DescVec+NPats;
- #endif notdef
-
- /* initialize */
- ResSize = 0;
- Found = 0;
- BuffPos = 0;
- BuffStart = Buffer;
- #ifdef notdef
- for (i=0; i < NPats; i++) {
- DescVec[i] -> Success = 0;
- DescVec[i] -> Start = Buffer;
- } /* for */
- #else !notdef
- for (Vec=DescVec; Vec < LastPatt; Vec++) {
- Desc = *Vec;
- Desc->Success = 0;
- Desc->Start = Buffer;
- }
- #endif notdef
- /* now do the searching */
- do {
- /* first, read a bufferfull and set up the variables */
- /*
- * Some systems *even* get upset when you ask for an odd read
- * length - ARGH!
- */
- NWanted = (int)((unsigned)(MAXBUFF - ResSize) & ~1);
- NRead = 0;
- do {
- /*
- * BuffStart+ResSize+BRead is even first time through
- * this loop - afterwards, no guaranties, but for
- * files this loop never goes more than once ...
- * Can't do any better.
- */
- NAvail =
- read(TextFile,BuffStart + ResSize + NRead, NWanted);
- if (NAvail == -1) {
- fprintf(stderr,
- "bm: error reading from input file\n");
- exit(2);
- } /* if */
- NRead += NAvail; NWanted -= NAvail;
- } while (NAvail && NWanted);
- BuffEx = 0;
- BuffSize = ResSize + NRead;
- BuffEnd = BuffStart + BuffSize - 1;
- /* locate the end of the last complete line */
- while (*BuffEnd != '\n' && BuffEnd >= BuffStart)
- --BuffEnd;
- if (BuffEnd < BuffStart)
- BuffEnd = BuffStart + BuffSize - 1;
- while (!BuffEx) { /* work through one buffer full */
- BuffEx = 1; /* set it provisionally, then clear
- * it if we find the buffer non-empty */
- #ifdef notdef
- for (i=0; i< NPats; i++) {
- if (!DescVec[i]->Success)
- /* if the pattern has not been found */
- DescVec[i]-> Success =
- Search(DescVec[i]->Pattern,
- DescVec[i]->PatLen, BuffStart, BuffEnd,
- DescVec[i]->Skip1, DescVec[i]->Skip2,
- DescVec[i]);
- if (DescVec[i]->Success){
- /* if a match occurred */
- BuffEx = 0;
- Valid = MatchFound(DescVec[i],BuffPos,
- BuffStart, BuffEnd);
- Found |= Valid;
- if ((sFlag || lFlag) && Found)
- return(0);
- } /* if */
- } /* for */
- #else !notdef
- for (Vec=DescVec; Vec < LastPatt; Vec++) {
- Desc = *Vec;
- if (!Desc->Success)
- /* if the pattern has not been found */
- Desc-> Success =
- Search(Desc->Pattern,
- Desc->PatLen, BuffStart, BuffEnd,
- Desc->Skip1, Desc->Skip2,
- Desc);
- if (Desc->Success){
- /* if a match occurred */
- BuffEx = 0;
- Valid = MatchFound(Desc,BuffPos,
- BuffStart, BuffEnd);
- Found |= Valid;
- if ((sFlag || lFlag) && Found)
- return(0);
- } /* if */
- } /* for */
- #endif notdef
- } /* while */
- if(NRead) {
- ResSize = MoveResidue(DescVec,NPats,Buffer,&BuffStart,BuffEnd);
- BuffPos += BuffSize - ResSize;
- } /* if */
- } while (NRead);
- return(!Found);
- } /* Execute */
- SHAR_EOF
- fi
- if test -f 'MoveResidue.c'
- then
- echo shar: "will not over-write existing file 'MoveResidue.c'"
- else
- cat << \SHAR_EOF > 'MoveResidue.c'
- #include "bm.h"
- /* Moves the text which has not been completely searched at the end of
- * the buffer to the beginning of the buffer
- * and returns number of bytes moved */
-
- /*
- * In coordination with the I/O optimization code in Execute, if the Residual
- * is odd in length, we move it to Buffer+1, otherwise to Buffer+0, to make
- * sure that [at least] the next read goes down on an even address. A pointer
- * to a pointer to the current start of data in the buffer is passed in, that
- * pointer is updated and passed back.
- */
-
- int MoveResidue(DescVec, NPats, Buffer, BuffStartP, BuffEnd)
- struct PattDesc *DescVec[];
- int NPats;
- char *Buffer, **BuffStartP, *BuffEnd;
- {
- char *FirstStart, *f, *t, *Residue;
- int ResSize, i;
- FirstStart = BuffEnd;
- /* find the earliest point which has not been
- * completely searched */
- for (i=0; i < NPats; i++) {
- FirstStart =
- min(FirstStart, DescVec[i]-> Start );
- } /* for */
- /* now scan to the beginning of the line containing the
- * unsearched text */
- for (Residue = FirstStart; *Residue != '\n' &&
- Residue >= *BuffStartP; Residue--);
- if (Residue < *BuffStartP)
- Residue = FirstStart;
- else ++Residue;
- ResSize = (BuffEnd - Residue + 1);
- /* now move the residue to the beginning of
- * the file */
- t = *BuffStartP = ((unsigned) ResSize & 1) ? Buffer+1 : Buffer;
- f = Residue;
- for(i=ResSize;i;--i)
- *t++ = *f++;
- /* now fix the status vectors */
- for (i=0; i < NPats; i++) {
- DescVec[i]->Start -= (Residue - *BuffStartP);
- } /* for */
- return(ResSize);
- } /* MoveResidue */
- SHAR_EOF
- fi
- if test -f 'bm.c'
- then
- echo shar: "will not over-write existing file 'bm.c'"
- else
- cat << \SHAR_EOF > 'bm.c'
- #include <stdio.h>
- #include <sys/types.h> /* XXXX ADDED FOR BSD2.9 */
- #include <sys/file.h>
- #include <strings.h>
- #include "bm.h"
- #include "Extern.h"
- main(argc,argv)
- int argc;
- char *argv[];
- {
- /* test driver for grep based on Boyer-Moore algorithm */
- char BigBuff[MAXBUFF + 3];
- /*
- * We leave one extra character at the beginning and end of the buffer,
- * but don't tell Execute about it. This is so when someone is
- * scanning the buffer and scans past the end (or beginning)
- * we are still technically in the buffer (picky, but there ARE
- * machines which would complain). We then leave an *additional*
- * free byte at the begining so we can pass an even address to Execute
- * (on some machines, odd address I/O can *COMPLETELY* destroy any
- * speed benefits of bm).
- */
- int ret = 1, /* return code from Execute */
- NotFound = 0, /* non-zero if file not readable */
- NFiles,
- NPats; /* number of patterns to search for */
- char i,
- *FlagPtr,
- **OptPtr; /* used to scan command line */
- int TextFile /* file to search */;
- struct PattDesc *DescVec[MAXPATS];
-
- --argc;
- OptPtr = argv + 1;
- while ( argc && **OptPtr == '-') {
- FlagPtr = *OptPtr + 1;
- while (*FlagPtr) {
- switch (*FlagPtr) {
- case 'c': cFlag = 1; break;
- case 'e': eFlag = 1;
- /* get the patterns from next arg */
- NPats = MkDescVec(DescVec,*++OptPtr);
- --argc;
- break;
- case 'f': fFlag = 1;
- /* read the patterns from a file */
- NPats = GetPatFile(*++OptPtr,DescVec);
- --argc;
- break;
- case 'l': lFlag = 1; break;
- case 'n': nFlag = 1; break;
- case 's': sFlag = 1; break;
- case 'x': xFlag = 1; break;
- case 'h': hFlag = 1; break;
- default:
- fprintf(stderr,
- "bm: invalid option: -%c \n",
- *FlagPtr);
- PutUsage();
- exit(2);
- } /* switch */
- ++FlagPtr;
- } /* while */
- ++OptPtr; --argc;
- } /* while */
- /* OptPtr now points to patterns */
- if (!fFlag && !eFlag) {
- if (!argc) {
- fprintf(stderr,"bm: no pattern specified\n");
- PutUsage();
- exit(2);
- } else
- NPats = MkDescVec(DescVec,*OptPtr);
- ++OptPtr; --argc;
- }
- /* OptPtr now points to first file */
- NFiles = argc;
- if (!NFiles)
- ret &= Execute(DescVec,NPats,0,BigBuff+2);
- else while (argc--) {
- if ((NFiles > 1) || lFlag) FileName = *OptPtr;
- if ((TextFile = open(*OptPtr,O_RDONLY,0)) < 0) {
- fprintf(stderr,"bm: can't open %s\n",*OptPtr);
- NotFound++;
- } else {
- ret &= Execute(DescVec,NPats,TextFile,BigBuff+2);
- if (sFlag && !ret)
- exit(0);
- close(TextFile);
- } /* if */
- ++OptPtr;
- } /* while */
- if (cFlag) printf("%d\n",MatchCount);
- exit(NotFound ? 2 : ret);
- } /* main */
- SHAR_EOF
- fi
- exit 0
- # End of shell archive
-
-
-